{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "from sklearn import datasets\n",
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "digits = datasets.load_digits()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def display_img(img_no):\n",
    "    fig, ax = plt.subplots()\n",
    "    ax.set_xticklabels([])\n",
    "    ax.set_yticklabels([])\n",
    "    ax.matshow(digits.images[img_no], cmap = plt.cm.binary);"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAO0AAADtCAYAAABTTfKPAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAABPNJREFUeJzt3TFuE2sYQNHxEwWUoaSkpSMpKdJ4CWQrUIbOa2AJziay\ngGQBIAV6KFxC59e8Aj2RgK3kZ645p8QZPgvmaizhn2+x3W4noOOfP/0GgN2IFmJECzGihRjRQoxo\nIebRXS8uFgv/HgR/yHa7Xfzs1++M9r8Ldx52fn4+nZ+f73zdvvadd3Fxsde89Xo9nZ2d7Xzdmzdv\n9pq32Wymo6Ojna9bLpd7zbu6uppOTk52vm61Wu01b7VaTW/fvt35un3+TKZp7P2576zF4qe9TtPk\n4zHkiBZiHiTa09PTh/htZzPvxYsXQ+c9fvx46Lxnz54Nnffq1auh80beLw8xS7R7GB3tkydPhs4T\n7bxn+XgMMaKFGNFCjGghRrQQI1qIES3EiBZifnlg4McvO5+eng7/IgP8DS4vL6fLy8vf+tmdogUe\nxv8fiO/evbv1Z308hhjRQoxoIUa0ECNaiBEtxIgWYkQLMaKFGNFCjGgh5pffPT5k+/6P//v6/Pnz\n0HmbzWbovKdPnw6dt16vh857/fr10Hm38aSFGNFCjGghRrQQI1qIES3EiBZiRAsxooUY0UKMaCFG\ntBAjWoixFgRmwFoQiLEWBA6YaCFGtBAjWogRLcSIFmJECzGihRjRQoxoIUa0ECNaiJnVLp/r6+uh\n80bv1rm5uRk67/nz50PnLZfLofNG3y92+QB7ES3EiBZiRAsxooUY0UKMaCFGtBAjWogRLcSIFmJE\nCzGihRi7fGAG7PKBGLt84ICJFmJECzGihRjRQoxoIUa0ECNaiBEtxIgWYkQLMaKFmFnt8tlsNkPn\nvXz5cui80bt1Rjs+Pv7Tb+Gv4EkLMaKFGNFCjGghRrQQI1qIES3EiBZiRAsxooUY0UKMaCFGtBBj\nlw/MgF0+EGOXDxww0UKMaCFGtBAjWogRLcSIFmJECzGihRjRQoxoIUa0EPNX7/JZLpdD5x260X9/\nR0dHQ+fNhSctxIgWYkQLMaKFGNFCjGghRrQQI1qIES3EiBZiRAsxooUY0UKMXT4wA3b5QIxdPnDA\nRAsxooUY0UKMaCFGtBAjWogRLcSIFmJECzGihRjRQsysdvmM3s1yfX09dN5oo3frXF1dDZ13dnY2\ndN5ceNJCjGghRrQQI1qIES3EiBZiRAsxooUY0UKMaCFGtBAjWogRLcTY5QMzYJcPxNjlAwdMtBAj\nWogRLcSIFmJECzGihRjRQoxoIUa0ECNaiBEtxCy22+3tLy4W27tev2+fPn0aNmuapun4+HjovPfv\n3w+dd3FxMXTezc3N0HmHvItpsVhM2+128bPXPGkhRrQQI1qIES3EiBZiRAsxooUY0UKMaCFGtBAj\nWogRLcSIFmLs8oEZ2GWXj6N5Azmad78czQMSRAsxooUY0UKMaCFGtBAjWogRLcSIFmJECzGihRjR\nQsysDgyMNvoL/KvVaui8k5OTofPW6/XQeYfMgQE4IKKFGNFCjGghRrQQI1qIES3EiBZiRAsxooUY\n0UKMaCFGtBBjlw/MQHaXz2iO5t0vR/Puj6N5cEBECzGihRjRQoxoIUa0ECNaiBEtxDxItL/7zY7q\nvA8fPgyd9+3bt6Hzvnz5MnTeId8vDzFLtHv4+PHj0Hnfv38fOu/r169D5x3y/ZKJFng4ooWYXx4Y\nGPhegB/cdmDgzmiB+fHxGGJECzGihRjRQoxoIeZffu8xKwIJF6EAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x10c33e898>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "display_img(0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[  0.,   0.,   5.,  13.,   9.,   1.,   0.,   0.],\n",
       "       [  0.,   0.,  13.,  15.,  10.,  15.,   5.,   0.],\n",
       "       [  0.,   3.,  15.,   2.,   0.,  11.,   8.,   0.],\n",
       "       [  0.,   4.,  12.,   0.,   0.,   8.,   8.,   0.],\n",
       "       [  0.,   5.,   8.,   0.,   0.,   9.,   8.,   0.],\n",
       "       [  0.,   4.,  11.,   0.,   1.,  12.,   7.,   0.],\n",
       "       [  0.,   2.,  14.,   5.,  10.,  12.,   0.,   0.],\n",
       "       [  0.,   0.,   6.,  13.,  10.,   0.,   0.,   0.]])"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "digits.images[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(64,)"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "digits.data[0].shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "digits.target[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "from sklearn.metrics.pairwise import cosine_similarity\n",
    "from sklearn.metrics.pairwise import chi2_kernel"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "X = digits.data\n",
    "y = digits.target"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "k_sim = chi2_kernel(X[0].reshape(1,-1), X)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[  1.00000000e+000,   7.57695024e-116,   1.95599924e-105, ...,\n",
       "          1.29644889e-083,   2.49956726e-051,   1.10169569e-079]])"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "k_sim"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "kf = pd.DataFrame(k_sim).T\n",
    "kf.columns = ['similarity']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>similarity</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.000000e+00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1167</th>\n",
       "      <td>1.644255e-07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>877</th>\n",
       "      <td>1.040593e-07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>464</th>\n",
       "      <td>1.232666e-08</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1541</th>\n",
       "      <td>8.598399e-09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1365</th>\n",
       "      <td>8.274881e-09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1029</th>\n",
       "      <td>1.907361e-09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>855</th>\n",
       "      <td>1.487874e-10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1697</th>\n",
       "      <td>1.191874e-10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>957</th>\n",
       "      <td>1.870301e-11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1463</th>\n",
       "      <td>1.714631e-12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1236</th>\n",
       "      <td>1.528919e-13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>646</th>\n",
       "      <td>8.264444e-14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>335</th>\n",
       "      <td>7.758213e-14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>812</th>\n",
       "      <td>4.250581e-14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>276</th>\n",
       "      <td>2.589843e-14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>305</th>\n",
       "      <td>1.141329e-14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>806</th>\n",
       "      <td>1.101042e-14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>311</th>\n",
       "      <td>6.290698e-15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1128</th>\n",
       "      <td>5.081774e-15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1494</th>\n",
       "      <td>4.847325e-15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>166</th>\n",
       "      <td>4.115303e-15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>516</th>\n",
       "      <td>2.819098e-15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>642</th>\n",
       "      <td>1.384852e-15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>229</th>\n",
       "      <td>1.246194e-15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1002</th>\n",
       "      <td>1.177180e-15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>676</th>\n",
       "      <td>8.403675e-16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1745</th>\n",
       "      <td>7.786381e-16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>396</th>\n",
       "      <td>7.525816e-16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>941</th>\n",
       "      <td>6.983304e-16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>558</th>\n",
       "      <td>1.580173e-124</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>537</th>\n",
       "      <td>1.045260e-124</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1718</th>\n",
       "      <td>8.244835e-125</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>987</th>\n",
       "      <td>3.545141e-125</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>832</th>\n",
       "      <td>1.385241e-125</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>947</th>\n",
       "      <td>1.010415e-125</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1551</th>\n",
       "      <td>8.810739e-126</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1000</th>\n",
       "      <td>4.658142e-126</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1648</th>\n",
       "      <td>3.785987e-126</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>572</th>\n",
       "      <td>3.661237e-126</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>994</th>\n",
       "      <td>3.042558e-126</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>312</th>\n",
       "      <td>2.429941e-126</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>953</th>\n",
       "      <td>3.466028e-127</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1437</th>\n",
       "      <td>1.789318e-127</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>986</th>\n",
       "      <td>7.574662e-128</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>215</th>\n",
       "      <td>5.942922e-128</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1640</th>\n",
       "      <td>1.382383e-128</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>336</th>\n",
       "      <td>5.279808e-129</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1380</th>\n",
       "      <td>2.082809e-129</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1329</th>\n",
       "      <td>4.867742e-131</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>517</th>\n",
       "      <td>1.673149e-131</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1334</th>\n",
       "      <td>7.601630e-132</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1626</th>\n",
       "      <td>4.137893e-132</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>341</th>\n",
       "      <td>2.398919e-132</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>623</th>\n",
       "      <td>6.193922e-133</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1585</th>\n",
       "      <td>1.176835e-133</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>916</th>\n",
       "      <td>4.820881e-134</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1213</th>\n",
       "      <td>1.319706e-134</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1631</th>\n",
       "      <td>5.139275e-138</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>609</th>\n",
       "      <td>2.381570e-138</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1797 rows × 1 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         similarity\n",
       "0      1.000000e+00\n",
       "1167   1.644255e-07\n",
       "877    1.040593e-07\n",
       "464    1.232666e-08\n",
       "1541   8.598399e-09\n",
       "1365   8.274881e-09\n",
       "1029   1.907361e-09\n",
       "855    1.487874e-10\n",
       "1697   1.191874e-10\n",
       "957    1.870301e-11\n",
       "1463   1.714631e-12\n",
       "1236   1.528919e-13\n",
       "646    8.264444e-14\n",
       "335    7.758213e-14\n",
       "812    4.250581e-14\n",
       "276    2.589843e-14\n",
       "305    1.141329e-14\n",
       "806    1.101042e-14\n",
       "311    6.290698e-15\n",
       "1128   5.081774e-15\n",
       "1494   4.847325e-15\n",
       "166    4.115303e-15\n",
       "516    2.819098e-15\n",
       "642    1.384852e-15\n",
       "229    1.246194e-15\n",
       "1002   1.177180e-15\n",
       "676    8.403675e-16\n",
       "1745   7.786381e-16\n",
       "396    7.525816e-16\n",
       "941    6.983304e-16\n",
       "...             ...\n",
       "558   1.580173e-124\n",
       "537   1.045260e-124\n",
       "1718  8.244835e-125\n",
       "987   3.545141e-125\n",
       "832   1.385241e-125\n",
       "947   1.010415e-125\n",
       "1551  8.810739e-126\n",
       "1000  4.658142e-126\n",
       "1648  3.785987e-126\n",
       "572   3.661237e-126\n",
       "994   3.042558e-126\n",
       "312   2.429941e-126\n",
       "953   3.466028e-127\n",
       "1437  1.789318e-127\n",
       "986   7.574662e-128\n",
       "215   5.942922e-128\n",
       "1640  1.382383e-128\n",
       "336   5.279808e-129\n",
       "1380  2.082809e-129\n",
       "1329  4.867742e-131\n",
       "517   1.673149e-131\n",
       "1334  7.601630e-132\n",
       "1626  4.137893e-132\n",
       "341   2.398919e-132\n",
       "623   6.193922e-133\n",
       "1585  1.176835e-133\n",
       "916   4.820881e-134\n",
       "1213  1.319706e-134\n",
       "1631  5.139275e-138\n",
       "609   2.381570e-138\n",
       "\n",
       "[1797 rows x 1 columns]"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "kf.sort_values('similarity', ascending=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAO0AAADtCAYAAABTTfKPAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAABM5JREFUeJzt3bFx1GoUgNHVmxcQUgCBS9iAgAyXsCU4JKQE0wEhISVA\nBw7JcEABLsGhM5G8gHmDjVfj/dEnnxOyXq4x+kYrkOZO8zzvgI5//vY3ABxHtBAjWogRLcSIFmJE\nCzH/PvTiNE3+Pwj+knmep9/9+oPR/vfGo4ddXl7uLi8vj37fUkvnXV9fL5r36dOn3bt3745+3+Fw\nWDTv9vZ29/Lly2Hzvn37tnvz5s3R77u4uFg0b+nPc7/fL5o38vhcOmuaftvrbrfz8RhyRAsxJ4n2\n/Pz8FL/taua9fv166LwXL14Mnffq1auh80b/PEceL6eYNT10zTpN07zle5OXXtMutfQaszJv6TXt\nUkuvaQumabr3H6J8PIYY0UKMaCFGtBAjWogRLcSIFmJECzF/fGDg15udz8/Ph999BM/B1dXV7urq\n6lFf646ogdwR9bTcEQUkiBZiRAsxooUY0UKMaCFGtBAjWogRLcSIFmJECzF/fGBgy0ZuQdjtdou2\nBJTmjb7X+bE32D+Vs7OzofPu40wLMaKFGNFCjGghRrQQI1qIES3EiBZiRAsxooUY0UKMaCFGtBBj\nLQiswDFrQY6KFjiN/58QP3z4cO/X+ngMMaKFGNFCjGghRrQQI1qIES3EiBZiRAsxooUY0UKMaCFm\nVbt8bm5uhs77+vXr0Hnfv38fOm+/3w+dd3t7O3Te6OPFLh9gEdFCjGghRrQQI1qIES3EiBZiRAsx\nooUY0UKMaCFGtBAjWoixywdWwC4fiLHLBzZMtBAjWogRLcSIFmJECzGihRjRQoxoIUa0ECNaiBEt\nxKxql8/19fXQeW/fvh06b/RundEOh8PQeY99KuaprOUJN2daiBEtxIgWYkQLMaKFGNFCjGghRrQQ\nI1qIES3EiBZiRAsxooUYu3xgBezygRi7fGDDRAsxooUY0UKMaCFGtBAjWogRLcSIFmJECzGihRjR\nQsyz3uXjiaWnNfrv77lypoUY0UKMaCFGtBAjWogRLcSIFmJECzGihRjRQoxoIUa0ECNaiLHLB1bA\nLh+IscsHNky0ECNaiBEtxIgWYkQLMaKFGNFCjGghRrQQI1qIES3ErGqXz36/Hzrv8+fPQ+eNdnNz\nM3Te6IdLHvtUzNY400KMaCFGtBAjWogRLcSIFmJECzGihRjRQoxoIUa0ECNaiBEtxNjlAytglw/E\n2OUDGyZaiBEtxIgWYkQLMaKFGNFCjGghRrQQI1qIES3EiBZipnme739xmuaHXq87OzsbOu/jx49D\n541+2OPi4mLovPfv3w+dN9I0Tbt5nqffveZMCzGihRjRQoxoIUa0ECNaiBEtxIgWYkQLMaKFGNFC\njGghRrQQY5cPrIBdPhBjlw9smGghRrQQI1qIES3EiBZiRAsxooUY0UKMaCFGtBAjWoh51rt8vnz5\nMnTe6N0zo3cHHQ6HofO2zC4f2BDRQoxoIUa0ECNaiBEtxIgWYkQLMaKFGNFCjGghRrQQI1qIscsH\nVsAuH4ixywc2TLQQI1qIES3EiBZiRAsxooUY0ULMSaJ97J0d1Xk/fvwYOu/u7m7ovNF/vi0fL6eY\nJdoFRPu0tny8ZKIFTke0EPPHXT4DvxfgF/ft8nkwWmB9fDyGGNFCjGghRrQQI1qI+QlR1yOA76Xr\nFAAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x10dbcf550>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "display_img(1167)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "co_sim = cosine_similarity(X[0].reshape(1,-1), X)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "cosf = pd.DataFrame(co_sim).T\n",
    "cosf.columns = ['similarity']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>similarity</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>877</th>\n",
       "      <td>0.980739</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>464</th>\n",
       "      <td>0.974474</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1365</th>\n",
       "      <td>0.974188</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1541</th>\n",
       "      <td>0.971831</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1167</th>\n",
       "      <td>0.971130</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1029</th>\n",
       "      <td>0.970858</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>396</th>\n",
       "      <td>0.968793</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1697</th>\n",
       "      <td>0.966019</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>646</th>\n",
       "      <td>0.965490</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1342</th>\n",
       "      <td>0.963990</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>160</th>\n",
       "      <td>0.961824</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>957</th>\n",
       "      <td>0.960468</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>335</th>\n",
       "      <td>0.959937</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1463</th>\n",
       "      <td>0.958401</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>855</th>\n",
       "      <td>0.958079</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>229</th>\n",
       "      <td>0.957180</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>642</th>\n",
       "      <td>0.956975</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>682</th>\n",
       "      <td>0.956633</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>812</th>\n",
       "      <td>0.954502</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>276</th>\n",
       "      <td>0.953733</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>311</th>\n",
       "      <td>0.953675</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>725</th>\n",
       "      <td>0.953565</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>0.953453</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>666</th>\n",
       "      <td>0.952949</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>516</th>\n",
       "      <td>0.952674</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>305</th>\n",
       "      <td>0.952255</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>724</th>\n",
       "      <td>0.951774</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1494</th>\n",
       "      <td>0.951671</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>458</th>\n",
       "      <td>0.951614</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1372</th>\n",
       "      <td>0.486735</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1613</th>\n",
       "      <td>0.486612</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>517</th>\n",
       "      <td>0.485804</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>972</th>\n",
       "      <td>0.484865</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1000</th>\n",
       "      <td>0.479740</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1357</th>\n",
       "      <td>0.479682</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>623</th>\n",
       "      <td>0.476078</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>215</th>\n",
       "      <td>0.473342</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>107</th>\n",
       "      <td>0.472412</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1634</th>\n",
       "      <td>0.471961</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>777</th>\n",
       "      <td>0.469736</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>85</th>\n",
       "      <td>0.466485</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>994</th>\n",
       "      <td>0.466440</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1590</th>\n",
       "      <td>0.463689</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1380</th>\n",
       "      <td>0.456986</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>609</th>\n",
       "      <td>0.456557</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1377</th>\n",
       "      <td>0.449912</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>267</th>\n",
       "      <td>0.449633</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>336</th>\n",
       "      <td>0.446742</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1621</th>\n",
       "      <td>0.442270</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1551</th>\n",
       "      <td>0.440442</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>341</th>\n",
       "      <td>0.430761</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1648</th>\n",
       "      <td>0.426239</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1329</th>\n",
       "      <td>0.425764</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1640</th>\n",
       "      <td>0.420540</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1334</th>\n",
       "      <td>0.420014</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1585</th>\n",
       "      <td>0.402730</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1213</th>\n",
       "      <td>0.393677</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1631</th>\n",
       "      <td>0.368377</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1626</th>\n",
       "      <td>0.361120</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1797 rows × 1 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      similarity\n",
       "0       1.000000\n",
       "877     0.980739\n",
       "464     0.974474\n",
       "1365    0.974188\n",
       "1541    0.971831\n",
       "1167    0.971130\n",
       "1029    0.970858\n",
       "396     0.968793\n",
       "1697    0.966019\n",
       "646     0.965490\n",
       "1342    0.963990\n",
       "160     0.961824\n",
       "957     0.960468\n",
       "335     0.959937\n",
       "1463    0.958401\n",
       "855     0.958079\n",
       "229     0.957180\n",
       "642     0.956975\n",
       "682     0.956633\n",
       "812     0.954502\n",
       "276     0.953733\n",
       "311     0.953675\n",
       "725     0.953565\n",
       "30      0.953453\n",
       "666     0.952949\n",
       "516     0.952674\n",
       "305     0.952255\n",
       "724     0.951774\n",
       "1494    0.951671\n",
       "458     0.951614\n",
       "...          ...\n",
       "1372    0.486735\n",
       "1613    0.486612\n",
       "517     0.485804\n",
       "972     0.484865\n",
       "1000    0.479740\n",
       "1357    0.479682\n",
       "623     0.476078\n",
       "215     0.473342\n",
       "107     0.472412\n",
       "1634    0.471961\n",
       "777     0.469736\n",
       "85      0.466485\n",
       "994     0.466440\n",
       "1590    0.463689\n",
       "1380    0.456986\n",
       "609     0.456557\n",
       "1377    0.449912\n",
       "267     0.449633\n",
       "336     0.446742\n",
       "1621    0.442270\n",
       "1551    0.440442\n",
       "341     0.430761\n",
       "1648    0.426239\n",
       "1329    0.425764\n",
       "1640    0.420540\n",
       "1334    0.420014\n",
       "1585    0.402730\n",
       "1213    0.393677\n",
       "1631    0.368377\n",
       "1626    0.361120\n",
       "\n",
       "[1797 rows x 1 columns]"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cosf.sort_values('similarity', ascending=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAO0AAADtCAYAAABTTfKPAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAABMdJREFUeJzt3bFRG2kYgOHdmwsdOHRIQAGUQOiQDqAElQAduASXQOhQ\nBTigBEKHInO2F9wFNx7Algb93ld+nhBp50MavbM7g5ZvXpZlAjr++t2/ALAf0UKMaCFGtBAjWogR\nLcT8/dqD8zz7exD8JsuyzM/9/NVo/ztw72G3t7fT7e3t3scd6tB5nz59Omjely9fpo8fP+593OfP\nnw+a9+3bt+nDhw97H3d1dXXQvO12O11eXu593CHHTNO/78vNzc2weSM/n4fOmudne52myeUx5IgW\nYo4S7aGXLZV55+fnQ+e9e/du6Lyzs7Oh8y4uLobOG/l5OcYs0R5AtG9LtPtxeQwxooUY0UKMaCFG\ntBAjWogRLcSIFmJ+esPA/7/sfHl5OfyLDPAn2G6303a7/aXn7hUtcBw/nhDv7u5efK7LY4gRLcSI\nFmJECzGihRjRQoxoIUa0ECNaiBEtxIgWYubXNgjM87yM3BS/2+2GzZqm8f91cPTNFqNf36EbFA71\n8PAwdN7I93Oe5xfXgjjTQoxoIUa0ECNaiBEtxIgWYkQLMaKFGNFCjGghRrQQI1qIES3EWAsCK7DP\nWhC35g3k1ry35dY8IEG0ECNaiBEtxIgWYkQLMaKFGNFCjGghRrQQI1qIES3E/PQun5FGfwH86elp\n6Lyrq6uh825ubobOe//+/dB59/f3Q+dtNpuh817iTAsxooUY0UKMaCFGtBAjWogRLcSIFmJECzGi\nhRjRQoxoIUa0EGOXD6zAPrt89ooWOI4fT4h3d3cvPtflMcSIFmJECzGihRjRQoxoIUa0ECNaiBEt\nxIgWYkQLMaKFmFXt8hnt+vp66LzRu3VGG73LZ7fbDZ23Fs60ECNaiBEtxIgWYkQLMaKFGNFCjGgh\nRrQQI1qIES3EiBZiRAsxdvnACtjlAzF2+cAJEy3EiBZiRAsxooUY0UKMaCFGtBAjWogRLcSIFmJE\nCzF/9C4f3tafultnNGdaiBEtxIgWYkQLMaKFGNFCjGghRrQQI1qIES3EiBZiRAsxooUYu3xgBezy\ngRi7fOCEiRZiRAsxooUY0UKMaCFGtBAjWogRLcSIFmJECzGihZhV7fIZvQvm8fFx6LxT96t3qbyV\nzWYzdN5aONNCjGghRrQQI1qIES3EiBZiRAsxooUY0UKMaCFGtBAjWogRLcTY5QMrYJcPxNjlAydM\ntBAjWogRLcSIFmJECzGihRjRQoxoIUa0ECNaiBEtxKxql8/Z2dnQeQ8PD0Pn3d/fD503erfO6N1I\nFxcXQ+ethTMtxIgWYkQLMaKFGNFCjGghRrQQI1qIES3EiBZiRAsxooUY0UKMXT6wAnb5QIxdPnDC\nRAsxooUY0UKMaCFGtBAjWogRLcSIFmJECzGihRjRQsyqdvmM3s2y2WxOet7o3UijdxWNfn1r4UwL\nMaKFGNFCjGghRrQQI1qIES3EiBZiRAsxooUY0UKMaCFGtBBjlw+sgF0+EGOXD5ww0UKMaCFGtBAj\nWogRLcSIFmJECzFHifZXv9lRnff4+Dh03vfv34fO2+12Q+d9/fp16LyRn5djzBLtAUT7tkS7H5fH\nECNaiJmXZXn5wXl++UHgqJZlmZ/7+avRAuvj8hhiRAsxooUY0UKMaCHmH68rElEe1RxYAAAAAElF\nTkSuQmCC\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x10dda6a58>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "display_img(877)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAO0AAADtCAYAAABTTfKPAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAABIJJREFUeJzt3c1NW1sUgFHfpzTgKTOgBRfAhBZogTZogxZogQkF0IIZ\nMsQl+E3eINIj/Fj4+H43aw2TOPtK1ifj5Bztab/fr4COf079AMD3iBZiRAsxooUY0UKMaCHm10e/\nOU2T/w+CE9nv99N7v/5htP+98NvD7u7uVnd3d99+3aEq866vrw+at91uV5eXl99+3cXFxUHznp+f\nV5vN5tuvu7+/P2he5f0bOWua3u11tVr58RhyRAsxR4n26urqGH/tXztvvV4PnXd2djZ03pLfv2PM\nmj76zjpN097Z5J9z6HfaQx36nfZQh36n5f+mafrjP0T58RhiRAsxooUY0UKMaCFGtBAjWogRLcR8\nemHg98POV1dXw0+vwN/g6elp9fT09KU/60TUQE5E8VVORMGCiBZiRAsxooUY0UKMaCFGtBAjWogR\nLcSIFmJECzHOHg80+uzx8/Pz0Hm73W7ovCVz9hgWRLQQI1qIES3EiBZiRAsxooUY0UKMaCFGtBAj\nWogRLcSIFmKsBYEZsBZkplzN46tczYMFES3EiBZiRAsxooUY0UKMaCFGtBAjWogRLcSIFmJECzGf\n3vKha71en/oROAKftBAjWogRLcSIFmJECzGihRjRQoxoIUa0ECNaiBEtxIgWYkQLMXb5wAzY5TNT\no3f5bLfbofNeXl6Gzlsyu3xgQUQLMaKFGNFCjGghRrQQI1qIES3EiBZiRAsxooUY0UKMCwMDTdO7\n57+P5vz8fOg8FwZ+jgsDsCCihRjRQoxoIUa0ECNaiBEtxIgWYkQLMaKFGNFCjGghRrQQY5cPzIBd\nPjPlah5f5WoeLIhoIUa0ECNaiBEtxIgWYkQLMaKFGNFCjGghRrQQI1qI+fSWDz9n9AF+lsknLcSI\nFmJECzGihRjRQoxoIUa0ECNaiBEtxIgWYkQLMaKFGNFCjF0+MAN2+czUxcXFqR/hqOzy+Tl2+cCC\niBZiRAsxooUY0UKMaCFGtBAjWogRLcSIFmJECzGihRi7fBbs+vr61I/AEfikhRjRQoxoIUa0ECNa\niBEtxIgWYkQLMaKFGNFCjGghRrQQI1qIscsHZsAun5kavctn9NW8+/v7ofOWzC4fWBDRQoxoIUa0\nECNaiBEtxIgWYkQLMaKFGNFCjGghRrQQY5fPQJvNZui83W43dB5j+KSFGNFCjGghRrQQI1qIES3E\niBZiRAsxooUY0UKMaCFGtBAjWoixywdmwC6fmbq5uTn1IxzVw8PDqR9hMezygQURLcSIFmJECzGi\nhRjRQoxoIUa0ECNaiBEtxIgWYkQLMXb5DLRer4fOe3l5GTqPMXzSQoxoIUa0ECNaiBEtxIgWYkQL\nMaKFGNFCjGghRrQQI1qIES3E2OUDM2CXz0zd3t4OnTf6at7j4+PQeUtmlw8siGghRrQQI1qIES3E\niBZiRAsxooWYo0T71ZMd5n3N6+vr0Hlvb29D5y35/TvGLNEG5o2OdrfbDZ235PcvEy1wPKKFmE8v\nDAx8FuA3f7ow8GG0wPz48RhiRAsxooUY0UKMaCHmX1ydOmft43HTAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x10ddedef0>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "display_img(1626)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
